package com.jason.photography.api.utl;

import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.core.util.URLUtil;
import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.google.gson.Gson;
import com.jason.common.core.exception.BizException;
import com.jason.common.http.service.OkHttpService;
import com.jason.photography.api.service.impl.Spider;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * 爬虫工具
 *
 * @author gzc
 * @since 2024/3/2 12:44
 **/
@Slf4j
public class SpiderUtil {
    private final String author;
    private String searchUrl = "https://www.xr03.vip/plus/search/index.asp?keyword=";
    public final String saveLocalDir;
    private final Map<Integer, Map<String, Spider>> parseMap = new HashMap<>(256);

    private OkHttpService okHttpService;

    private final Map<String, List<Integer>> errMsgMap = new HashMap<>(16);
    private final List<Integer> errPageNumList = new ArrayList<>(16);

    public SpiderUtil(String author, String saveLocalDir) {
        this.saveLocalDir = saveLocalDir;
        this.author = author;
        this.searchUrl = searchUrl + URLUtil.encode(author);
    }

    public SpiderUtil(String author, String saveLocalDir, String searchUrl) {
        this(author, saveLocalDir);
        this.searchUrl = searchUrl;
    }

    public void searchRun() throws Exception {
        searchRun(null);
    }

    /**
     * 搜索页运行
     */
    public void searchRun(List<Integer> addPageCountDataList) throws Exception {
        long begin1 = System.currentTimeMillis();
        List<String> searchAllPage = this.searchAllPage(searchUrl);
        List<CompletableFuture> futureList = new ArrayList<>(64);
        // 遍历所有搜索页
        for (int i = 0; i < searchAllPage.size(); i++) {
            if (CollUtil.isNotEmpty(addPageCountDataList)) {
                int i1 = i + 1;
                for (Integer addPageNum : addPageCountDataList) {
                    if (i1 == addPageNum) {
                        futureList.add(extracted(searchAllPage, i, author));
                    }
                }
            } else {
                futureList.add(extracted(searchAllPage, i, author));
            }
        }
        // 并行执行
        futureList.stream().map(CompletableFuture::join).toList();
        this.errMsgMap.put(author, errPageNumList);
        long between1 = System.currentTimeMillis() - begin1;
        System.out.println("检索总据耗时s->" + (between1 / 1000));

        // 记录执行数据
        AtomicInteger photoAlbumNum = new AtomicInteger(0);
        parseMap.forEach((k, v) -> {
//            System.out.println("第->" + k + "页: ");
            v.forEach((k1, v2) -> {
//                System.out.println("    名称->" + k1 + "，图片数量->" + v2.getImgList().size());
                photoAlbumNum.getAndIncrement();
            });
//            System.out.println("============================================");
        });
        System.out.println("页总数->" + parseMap.size() + ", 写真集总数->" + photoAlbumNum);
        String json = new Gson().toJson(parseMap);
        String recordFilePath = saveLocalDir + File.separator + author + File.separator + "数据记录.txt";
        // 文件存在则追加
        String substring = json.substring(1, json.length() - 1);
        if (FileUtil.exist(recordFilePath) && CollUtil.isNotEmpty(addPageCountDataList)) {
            FileUtil.appendUtf8String("," + substring, recordFilePath);
        } else {
            FileUtil.writeString(substring, recordFilePath, CharsetUtil.CHARSET_UTF_8);
        }
        // 图片写入磁盘
        saveFileToLocal(this.parseMap);
    }

    public void saveFileToLocal(Map<Integer, Map<String, Spider>> reqParseMap) {
        System.out.println("开始保存图片到本地");
        AtomicInteger curr = new AtomicInteger(1);
        reqParseMap.forEach((pageNum, pageDataMap) -> {
            long begin = System.currentTimeMillis();
            List<CompletableFuture> futureList = new ArrayList<>(64);
            try {
                pageDataMap.forEach((title, spider) -> {
                    try {
                        CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
                            if (okHttpService != null) {
                                spider.enableOkHttp(okHttpService);
                            }
                            spider.writeImgFile(saveLocalDir);
                        });
                        futureList.add(future);
                    } catch (Throwable e) {
                        log.error("第" + pageNum + "页，名称->" + title + "发生异常", e);
                    }
                });
            } catch (Exception e) {
                System.out.println("第" + pageNum + "页发生异常");
                log.error("保存图片到本地发生异常->", e);
            }
            // 并行执行
            futureList.stream().map(CompletableFuture::join).toList();
            long between = System.currentTimeMillis() - begin;
            System.out.println("已处理完第" + curr + "页数据，耗时->" + (between / 1000) + "秒");
            curr.getAndIncrement();
        });
    }


    private CompletableFuture extracted(List<String> searchAllPage, int i, String author) throws URISyntaxException {
        return CompletableFuture.runAsync(() -> {
            int pageNum = i + 1;
            System.out.println("开始爬取第" + pageNum + "页数据");
            long begin = System.currentTimeMillis();
            try {
                List<String> searchLinks = searchLinks(searchAllPage.get(i));
                if (CollUtil.isEmpty(searchLinks)) {
                    this.errPageNumList.add(pageNum);
                    throw new BizException("第" + pageNum + "页 没有数据");
                }
                Map<String, Spider> map = new HashMap<>(10);
                // 遍历当前搜索页所有搜索结果
                for (String url : searchLinks) {
                    Spider spider = null;
                    try {
                        spider = new Spider(url);
                        spider.execute();
                        spider.setAuthor(author);
//                        spider.writeImgFile("D://Spider");
                        map.put(spider.getTitle(), spider);
                    } catch (Throwable e) {
                        if (spider != null) {
                            System.out.println("执行地址->" + url + "发生异常");
                        }
                        log.error("遍历当前搜索页所有搜索结果发生异常->", e);
                    }
                }

                parseMap.put(pageNum, map);
            } catch (Throwable throwable) {
                log.error("爬取第" + pageNum + "页数据发生异常->", throwable);
            }
            long between = System.currentTimeMillis() - begin;
            System.out.println("爬取第" + pageNum + "页数据耗时->" + (between / 1000) + "秒");
        });
    }

    private List<String> searchAllPage(String searchHomeUrl) throws URISyntaxException {
        URI uri = new URI(searchHomeUrl);
        String ss = uri.getScheme() + "://" + uri.getHost() + uri.getPath();
        List<String> searchLinkList = new ArrayList<>(16);
//        String htmlStr = HttpUtil.get(searchHomeUrl, 30000);
        String htmlStr = okHttpService == null
                ? HttpUtil.get(searchHomeUrl, 30000)
                : okHttpService.doGet("获取搜索页html", searchHomeUrl);
        Document document = Jsoup.parse(htmlStr);
        Elements page = document.getElementsByClass("page");
        for (Element element : page) {
            Elements elements = element.select("a[href]");
            for (Element element1 : elements) {
                String href = element1.attr("href");
                if (StrUtil.isNotBlank(href)) {
                    searchLinkList.add(ss + href);
                }
            }
        }
        return searchLinkList;
    }

    private List<String> searchLinks(String searchUrl) throws URISyntaxException {
        List<String> searchLinkList = new ArrayList<>(16);
        URI uri = new URI(searchUrl);
        String sss = uri.getScheme() + "://" + uri.getHost();
        String htmlStr = okHttpService == null
                ? HttpUtil.get(searchUrl, 30000)
                : okHttpService.doGet("获取搜索页html", searchUrl);
        Document document = Jsoup.parse(htmlStr);
        Elements elements = document.getElementsByClass("sousuo");
        if (CollUtil.isNotEmpty(elements)) {
            for (Element element : elements) {
                Elements elements1 = element.select("h2");
                for (Element element1 : elements1) {
                    Elements elements2 = element1.select("a[href]");
                    for (Element element2 : elements2) {
                        String href = element2.attr("href");
                        searchLinkList.add(sss + href);
                    }
                }
            }
        }
        return searchLinkList;
    }

    public void readRecordFileSaveImg() {
        readRecordFileSaveImg(1, 1);
    }

    public void readRecordFileSaveImg(int beginPageNum, int beginNum) {
        String readString = FileUtil.readString(saveLocalDir + File.separator + author + File.separator + "数据记录.txt", StandardCharsets.UTF_8);
        String jsonStr = "{" + readString + "}";
        JSONObject parseObj = JSONUtil.parseObj(jsonStr);
        AtomicInteger pageIndex = new AtomicInteger(1);
        parseObj.forEach((pageNum, pageDataMap) -> {
            if (pageIndex.get() >= beginPageNum) {
                JSONObject entries = JSONUtil.parseObj(JSONUtil.toJsonStr(pageDataMap));
                Map<String, Spider> pageDataMap2 = new HashMap<>(256);
                AtomicInteger index = new AtomicInteger(1);
                entries.forEach((name, obj) -> {
                    if (index.get() >= beginNum) {
                        JSONObject parseObj222 = JSONUtil.parseObj(JSONUtil.toJsonStr(obj));
                        Spider spider = new Spider(parseObj222.getStr("url"));
                        spider.setTitle(parseObj222.getStr("title"));
                        spider.setAuthor(parseObj222.getStr("author"));
                        spider.setImgList(parseObj222.getBeanList("imgList", String.class));
                        pageDataMap2.put(name, spider);
                    }
                    index.getAndIncrement();
                });
                this.parseMap.put(Integer.valueOf(pageNum), pageDataMap2);
            }
            pageIndex.getAndIncrement();
        });
        saveFileToLocal(this.parseMap);
    }


    public void setOkHttpService(OkHttpService okHttpService) {
        this.okHttpService = okHttpService;
    }

    public Map<String, List<Integer>> getErrMsgMap() {
        return errMsgMap;
    }
}
